#################################################################################
### Racialized Misinformation, Factual Corrections, and Prejudicial Attitudes ###
### [Minimum Detectable Effects Analysis for Study 2]                         ###
### Authors: Eddy S. F. Yeung, Joseph Glasgow                                 ###
### Date: June 26, 2025                                                       ###
#################################################################################

### Set-up ----
## Clean the working environment and set the working directory
rm(list = ls())
setwd("~/Desktop/racialized_misinfo/replication/Study 2") # set your working directory here, which should also contain the survey data ("study2_dataset.csv")

## Import the dataset
df <- read.csv("study2_dataset.csv")

### Code outcome variables ----
## Explicit prejudice (difference between Blacks' laziness and Whites' laziness)
df$explicit_prej <- df$explicit_prej_2 - df$explicit_prej_1

## Racial resentment (mean effects index with imputation)
rr_matrix <- cbind(df$racial_resent_1, df$racial_resent_2, df$racial_resent_3, 
                   df$racial_resent_4, df$racial_resent_5)
cal_MEI <- 
  function(Z, outcome_mat, to_reorient, reorient = F, greedy = T, impute = F) {
    if(impute == T) {
      R <- 1 * is.na(outcome_mat)
      means_for_imputation <- 
        rbind(apply(outcome_mat[Z == 0, ], MAR = 2, FUN = mean, na.rm = T),
              apply(outcome_mat[Z == 1, ], MAR = 2, FUN = mean, na.rm = T))
      to_impute <- R * means_for_imputation[Z + 1, ]
      outcome_mat[is.na(outcome_mat)] <- 0
      outcome_mat <- outcome_mat + to_impute
    }
    c_mean <- apply(X = outcome_mat[Z == 0, ], MARGIN = 2, FUN = mean, na.rm = T)
    c_sd <- apply(X = outcome_mat[Z == 0, ], MARGIN = 2, FUN = sd, na.rm = T)
    z_score <- t(t(sweep(outcome_mat, 2, c_mean)) / c_sd)
    index_numerator <- rowSums(z_score)
    if(greedy == T) {
      n_outcomes <- rowSums(!is.na(z_score))
    }
    else if(greedy == F){
      n_outcomes <- ncol(outcome_mat)
    }
    index <- index_numerator / n_outcomes
    index <- (index - mean(index[Z == 0], na.rm = T)) / sd(index[Z == 0], na.rm = T)
    return(index)
  }
df <- df %>% 
  mutate(resent = cal_MEI(Z = df$treatment, outcome_mat = rr_matrix, impute = T))

### Conduct minimum detectable effects analysis on explicit prejudice ----
## Calculate the SD of the variable in the control group
sd.ctrl <- sd(df$explicit_prej[df$treatment == 0], na.rm = T)
sd.ctrl

## Calculate the SD of the variable in the treatment group
sd.treat <- sd(df$explicit_prej[df$treatment == 1], na.rm = T)
sd.treat

## Calculate the sample size in the control group
n.ctrl <- length(df$explicit_prej[df$treatment == 0][!is.na(df$explicit_prej[df$treatment == 0])])
n.ctrl

## Calculate the sample size in the treatment group
n.treat <- length(df$explicit_prej[df$treatment == 1][!is.na(df$explicit_prej[df$treatment == 1])])
n.treat

## MDE calculation
# The true effect size must be at least 2.8 standard errors from zero to detect 
# it with 80% probability using 95% confidence intervals (Gelman and Hill 2006).
# To estimate the standard error of the ATE, we use equation 3.6 in Gerber and 
# Green (2012). Thus, we simply multiply 2.8 the standard error of the ATE to 
# calculate the MDE.
MDE <- 2.8 * sqrt((sd.ctrl^2/(n.ctrl/2)) + (sd.treat^2/(n.treat/2)))
MDE / sd(df$explicit_prej, na.rm = T) # MDE in Cohen's d

### Conduct minimum detectable effects analysis on racial resentment ----
## Calculate the SD of the variable in the control group
sd.ctrl <- sd(df$resent[df$treatment == 0], na.rm = T)
sd.ctrl

## Calculate the SD of the variable in the treatment group
sd.treat <- sd(df$resent[df$treatment == 1], na.rm = T)
sd.treat

## Calculate the sample size in the control group
n.ctrl <- length(df$resent[df$treatment == 0][!is.na(df$resent[df$treatment == 0])])
n.ctrl

## Calculate the sample size in the treatment group
n.treat <- length(df$resent[df$treatment == 1][!is.na(df$resent[df$treatment == 1])])
n.treat

## MDE calculation
# The true effect size must be at least 2.8 standard errors from zero to detect 
# it with 80% probability using 95% confidence intervals (Gelman and Hill 2006).
# To estimate the standard error of the ATE, we use equation 3.6 in Gerber and 
# Green (2012). Thus, we simply multiply 2.8 the standard error of the ATE to 
# calculate the MDE.
MDE <- 2.8 * sqrt((sd.ctrl^2/(n.ctrl/2)) + (sd.treat^2/(n.treat/2)))
MDE / sd(df$resent, na.rm = T) # MDE in Cohen's d
